*************************************************************************
* This do files plots all the figures that describes the data.
* Outputs: Figure 1, Figure 3, Figure A2, Figure A3, Figure A4, Figure A7
* Last modified by: Yukun Wang
* Date: 06/24/2020
*************************************************************************

clear all
set more off
set scheme s1color

***Set directory
capture cd "/Users/Dropbox/Chinese food exports"


******************
*    Figure 1    *
******************

***import data
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta"

***keep only exports
keep if import==0

***standerlize hs code
tostring hs_id, replace
replace hs_id="0"+hs_id if length(hs_id)==7
gen hs_id_2dg=substr(hs_id,1,2)
destring hs_id_2dg, replace

***sum value by year
bys year:egen yearly_value=sum(value)

***keep only food products
keep if hs_id_2dg<40
merge m:1 hs_id using "Data/Codebooks/HS_code_for_food.dta",update   //merge datasets
drop if _merge==1 | _merge==2   //drop nonfood products

***sum value by year
bys year:egen food_yearly_value=sum(value)
drop description _merge

***keep dairy products
keep if hs_id=="04011000" | hs_id=="04012000" | hs_id=="04013000" | hs_id=="04014000"   ///
 | hs_id=="04015000" | hs_id=="04021000" | hs_id=="04022100" | hs_id=="04022900"   ///
 | hs_id=="04029100" | hs_id=="04029900" | hs_id=="04031000" | hs_id=="04039000"   ///
 | hs_id=="04041000" | hs_id=="04049000" | hs_id=="04051000" | hs_id=="04052000"   ///
 | hs_id=="04059000" | hs_id=="04061000" | hs_id=="04062000" | hs_id=="04063000"   ///
 | hs_id=="04064000" | hs_id=="04069000" | hs_id=="19011000" | hs_id=="19019000"   ///
 | hs_id=="35011000" | hs_id=="35022000"


**********************************Dairy Products********************************
***sum value&quantity by year
bys year: egen dairy_yearly_value=sum(value)
bys year:egen dairy_yearly_quantity=sum(quantity)

***change units to million dollars
replace yearly_value= yearly_value/10^6
replace food_yearly_value= food_yearly_value/10^6
replace dairy_yearly_value = dairy_yearly_value/10^6
replace dairy_yearly_quantity = dairy_yearly_quantity/10^6
label var yearly_value "unit:million dollars"
label var food_yearly_value "unit:million dollars"
label var dairy_yearly_value "unit:million dollars"
label var dairy_yearly_quantity "unit:kiloton"

***draw line graphs
duplicates drop year, force
tsset year
tsfill   //fill missing date observations
replace dairy_yearly_value=0 if dairy_yearly_value==.
replace dairy_yearly_quantity=0 if dairy_yearly_quantity==.

twoway (line dairy_yearly_value year, xaxis(1) yaxis(1) xvarlab(time) xlabel(2000(1)2013) color(black) ytitle("Value of Exports (in Million Dollars)",axis(1))) ///
	   (line dairy_yearly_quantity year, yaxis(2) ytitle("Quantity of Exports(in Million kiloton)",axis(2)) xline(2008.5, lcolor(black)) color(black) lpattern(dash)), ///
	   legend(order(1 2) cols(2) label(1 "Value") label(2 "Quantity")) ///
	   xtitle("Year") scale(0.85) graphregion(color(white)) bgcolor(white)

***save graphs
graph export "Results/main_figures/figure_1.png", as(png) replace



******************
*    Figure 3    *
******************

use "Data/google_trends/gtrends_orig.dta", clear
gen high=(gnewsindex_orig>=1.6177370) //use Belgium as cutoff
separate gnewsindex_orig, by(high) veryshortlabel
label var gnewsindex_orig0 "Low"
label var gnewsindex_orig1 "High"

graph hbar (asis) gnewsindex_orig?, over(country_english, label(labsize(small)) sort(gnewsindex_orig) descending) bar(1,color(black) fintensity(50)) bar(2,color(black))

graph export "Results/main_figures/figure_3.png", as(png) replace




*******************
*    Figure A2    *
*******************

***Imports of dairy

use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if import==1

replace value=value/10^6  //change to million dollars

merge m:1 hs_id using "Data/product_codebook/dairy_products_codebook"
drop if _m==2
g dairy=0 if _m==1
replace dairy=1 if _m==3
drop _m

g milkpowder=1 if hs_id=="04021000" | hs_id=="04022100" | hs_id=="04022900"

bys year: egen tot=total(value)
g lntot=log(tot) 

bys year: egen totdairy=total(value) if dairy==1
g lntotdairy=log(totdairy) if dairy==1

bys year: egen totmilk=total(value) if milkpowder==1
g lntotmilk=log(totmilk) if milkpowder==1

collapse (mean) tot* lntot*, by(year)

levelsof year, local(xlevels)

graph twoway (line tot year,sort lc(gs3) yaxis(1) ytitle("Value of imports (in million USD)", axis(1))) ///
			(line totdairy year,sort lc(gs3) lp(dash) yaxis(2) ytitle("Dairy and milk powder imports (in million USD)",axis(2))) ///
			(line totmilk year, sort lc(gs3) lp(shortdash) yaxis(2)), ///
			legend(order(1 2 3) cols(3) label(1 "Total imports") label(2 "Dairy imports") label(3 "Milk powder imports") ring(1) pos(6) ) ///
			xlabel(`xlevels') ytitle("Value of imports (in million USD)") ylabel(, labsize(small)) xtitle("Year") xline(2008.5, lcolor(red))  ///
			graphregion(color(white)) bgcolor(white)  scale(0.9)
			
graph export "Results/figures/figure_a2.pdf",replace



*******************
*    Figure A3    *
*******************

***Exports of dairy

use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta",clear

keep if import==0 

replace value=value/10^6  //change to million dollars

merge m:1 hs_id using "Data/product_codebook/HS_code_for_food"
drop if _m==2
g food=0 if _m==1
replace food=1 if _m==3
drop _m

merge m:1 hs_id using "Data/product_codebook/dairy_products_codebook"
drop if _m==2
g dairy=0 if _m==1
replace dairy=1 if _m==3
drop _m

bys year: egen tot=total(value)
//g lntot=log(tot)

bys year food: egen tot_food=total(value)
g tot_food_n=.
forvalues t=2000(1)2013 {
	sum tot_food if year==`t'& food==1
	replace tot_food_n=r(mean) if year==`t'
	}

g pct_food=tot_food_n/tot

bys year dairy: egen tot_dairy=total(value)
g tot_dairy_n=.
forvalues t=2000(1)2013 {
	sum tot_dairy if year==`t'& dairy==1
	replace tot_dairy_n=r(mean) if year==`t'
	}

g pct_dairy=tot_dairy_n/tot
g pct_dairy2=tot_dairy_n/tot_food_n


g ones=1
bys year: g tag=_n

sum tot if year==2007		//1.2 trillion
sum tot_food_n if year==2007 //32 billion
sum tot_dairy_n if year==2007 //300 million; 


***Export growth by milk-powder intensive destinations 

preserve	//milk powder destinations

keep if hs_id=="04021000" | hs_id=="04022100" | hs_id=="04022900" 

keep if year<2008

collapse (sum) value,by(origin_id)

g diaryall=1

sum value,d
g diary50=(value>r(p50))
g diary90=(value>r(p90))

keep origin_id diaryall diary50 diary90

tempfile temp
save `temp',replace
 
restore

merge m:1 origin_id using `temp'
tab _m
drop if _m==2
drop _m


bys year: egen tot_diaryall=total(value) if diaryall==1
//g lntot_diaryall=log(tot_diaryall)

bys year: egen tot_diary50=total(value) if diary50==1
//g lntot_diary50=log(tot_diary50)

bys year: egen tot_diary90=total(value) if diary90==1
//g lntot_diary90=log(tot_diary90)

collapse (mean) tot*, by(year)

levelsof year, local(xlevels)

graph twoway (line tot year,sort lc(gs3)) ///
			(line tot_diary90 year,sort lc(gs3) lp(dash)), ///
			legend(order(1 2) cols(2) label(1 "Total exports") label(2 "Exports to major milk-powder importing countries") ring(1) pos(6) ) ///
			xlabel(`xlevels') ytitle("Value of exports (in million USD)") ylabel(, labsize(small)) xtitle("Year") xline(2008.5, lcolor(red))  ///
			graphregion(color(white)) bgcolor(white)  scale(0.9)
			
graph export "Results/appendix_figures/figure_a3.pdf",replace



*******************
*    Figure A4    *
*******************

***Load data
use "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_step1.dta", clear
keep if food==1   //keep only food
drop if year==2008  //drop 2008
gen post=(year>=2009)
bys party_id: egen nondairyfirm=min(dairy)
bys party_id: egen dairyfirm=max(dairy)
replace party_id=party_id+"00" if nondairyfirm==0 & dairyfirm==1 & dairy==0   //coding the non-dairy part of the dairy firm as a new firm

tempfile dataexploring
save `dataexploring', replace


********************************
* Histogram of Num of Products *
********************************

************2000~2007************

use `dataexploring', clear
keep if post==0

***Collapse data at firm-product-year level
collapse (mean) food dairy affected_firms, by(party_id hs_id year)
bys party_id year: gen numofprodbyyear=_N

***Create a codebook for medium number of products
preserve
duplicates drop party_id year, force
collapse (median) numofprodbyyear, by(party_id)
ren numofprodbyyear mednumofprod
tempfile mednumofprod
save `mednumofprod', replace
restore

merge m:1 party_id using `mednumofprod'  //merge in medium number of products

***Collapse data at firm-product level
collapse (mean) dairy affected_firms mednumofprod, by(party_id hs_id)
bys party_id: gen totalnumofprod=_N   //calculate number of total products

***Collapse data at firm level
collapse (mean) dairy affected_firms mednumofprod totalnumofprod, by(party_id)
gen partynum=1

***Median for number of products
preserve
keep if affected_firms==1
collapse (sum) partynum, by(mednumofprod dairy)
replace mednumofprod=mednumofprod-0.1 if dairy==1
replace mednumofprod=mednumofprod+0.1 if dairy==0
graph twoway (bar partynum mednumofprod if dairy==1, xlabel(1(1)17) yaxis(1) ylabel(0(1)10, axis(1)) ytitle("Number of Contaminated Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofprod if dairy==0, xlabel(1(1)17) yaxis(2) ylabel(0(1)10, axis(2)) ytitle("Number of Contaminated Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Products") title("Number of Products (Median) for Contaminated Firms", size(medlarge)) subtitle("2000 - 2007")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_1.pdf",replace
restore


preserve
keep if affected_firms==0
egen p90=pctile(mednumofprod), p(90)
drop if mednumofprod>p90
collapse (sum) partynum, by(mednumofprod dairy)
replace mednumofprod=mednumofprod-0.1 if dairy==1
replace mednumofprod=mednumofprod+0.1 if dairy==0
graph twoway (bar partynum mednumofprod if dairy==1, xlabel(0(1)10) yaxis(1) ytitle("Number of Innocent+Non-Inspected Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofprod if dairy==0, xlabel(0(1)10) yaxis(2) ytitle("Number of Innocent+Non-Inspected Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Products") title("Number of Products (Median) for Innocent+Non-Inspected Firms", size(medlarge)) subtitle("2000 - 2007")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_2.pdf",replace
restore

************************************
* Histogram of Num of Destinations *
************************************

************2000~2007************

use `dataexploring', clear
keep if post==0

***Collapse data at firm-product-year level
collapse (mean) food dairy affected_firms, by(party_id origin_id year)
bys party_id year: gen numofdesnbyyear=_N

***Create a codebook for medium number of products
preserve
duplicates drop party_id year, force
collapse (median) numofdesnbyyear, by(party_id)
ren numofdesnbyyear mednumofdesn
tempfile mednumofdesn
save `mednumofdesn', replace
restore

merge m:1 party_id using `mednumofdesn'  //merge in medium number of destinations

***Collapse data at firm-product level
collapse (mean) dairy affected_firms mednumofdesn, by(party_id origin_id)
bys party_id: gen totalnumofdesn=_N   //calculate number of total destinations

***Collapse data at firm level
collapse (mean) dairy affected_firms mednumofdesn totalnumofdesn, by(party_id)
gen partynum=1

***Median for number of destinations
preserve
keep if affected_firms==1
egen p95=pctile(mednumofdesn), p(95)
drop if mednumofdesn>p95
collapse (sum) partynum, by(mednumofdesn dairy)
replace mednumofdesn=mednumofdesn-0.1 if dairy==1
replace mednumofdesn=mednumofdesn+0.1 if dairy==0
graph twoway (bar partynum mednumofdesn if dairy==1, xlabel(0(2)18) yaxis(1) ylabel(0(1)5, axis(1)) ytitle("Number of Contaminated Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofdesn if dairy==0, xlabel(0(2)18) yaxis(2) ylabel(0(1)5, axis(2)) ytitle("Number of Contaminated Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Destinations") title("Number of Destinations (Median) for Contaminated Firms", size(medlarge)) subtitle("2000 - 2007")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_3.pdf",replace
restore


preserve
keep if affected_firms==0
egen p90=pctile(mednumofdesn), p(90)
drop if mednumofdesn>p90
collapse (sum) partynum, by(mednumofdesn dairy)
replace mednumofdesn=mednumofdesn-0.1 if dairy==1
replace mednumofdesn=mednumofdesn+0.1 if dairy==0
graph twoway (bar partynum mednumofdesn if dairy==1, xlabel(1(1)5) yaxis(1) ytitle("Number of Innocent+Non-Inspected Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofdesn if dairy==0, xlabel(1(1)5) yaxis(2) ytitle("Number of Innocent+Non-Inspected Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Destinations") title("Number of Destinations (Median) for Innocent+Non-Inspected Firms", size(medlarge)) subtitle("2000 - 2007")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_4.pdf",replace
restore

********************************
* Histogram of Num of Products *
********************************

************2009~2013************

use `dataexploring', clear
keep if post==1

***Collapse data at firm-product-year level
collapse (mean) food dairy affected_firms, by(party_id hs_id year)
bys party_id year: gen numofprodbyyear=_N

***Create a codebook for medium number of products
preserve
duplicates drop party_id year, force
collapse (median) numofprodbyyear, by(party_id)
ren numofprodbyyear mednumofprod
tempfile mednumofprod
save `mednumofprod', replace
restore

merge m:1 party_id using `mednumofprod'  //merge in medium number of products

***Collapse data at firm-product level
collapse (mean) dairy affected_firms mednumofprod, by(party_id hs_id)
bys party_id: gen totalnumofprod=_N   //calculate number of total products

***Collapse data at firm level
collapse (mean) dairy affected_firms mednumofprod totalnumofprod, by(party_id)
gen partynum=1

***Median for number of products
preserve
keep if affected_firms==1
collapse (sum) partynum, by(mednumofprod dairy)
replace mednumofprod=mednumofprod-0.1 if dairy==1
replace mednumofprod=mednumofprod+0.1 if dairy==0
graph twoway (bar partynum mednumofprod if dairy==1, xlabel(0(1)9) yaxis(1) ylabel(0(1)8, axis(1)) ytitle("Number of Contaminated Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofprod if dairy==0, xlabel(0(1)9) yaxis(2) ylabel(0(1)8, axis(2)) ytitle("Number of Contaminated Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Products") title("Number of Products (Median) for Contaminated firms", size(medlarge)) subtitle("2009 - 2013")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_5.pdf",replace
restore


preserve
keep if affected_firms==0
egen p90=pctile(mednumofprod), p(90)
drop if mednumofprod>p90
collapse (sum) partynum, by(mednumofprod dairy)
replace mednumofprod=mednumofprod-0.1 if dairy==1
replace mednumofprod=mednumofprod+0.1 if dairy==0
graph twoway (bar partynum mednumofprod if dairy==1, xlabel(0(1)8) yaxis(1) ytitle("Number of Innocent+Non-Inspected Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofprod if dairy==0, xlabel(0(1)8) yaxis(2) ytitle("Number of Innocent+Non-Inspected Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Products") title("Number of Products (Median) for Innocent+Non-Inspected firms", size(medlarge)) subtitle("2009 - 2013")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_6.pdf",replace
restore



************************************
* Histogram of Num of Destinations *
************************************

************2009~2013************

use `dataexploring', clear
keep if post==1

***Collapse data at firm-product-year level
collapse (mean) food dairy affected_firms, by(party_id origin_id year)
bys party_id year: gen numofdesnbyyear=_N

***Create a codebook for medium number of products
preserve
duplicates drop party_id year, force
collapse (median) numofdesnbyyear, by(party_id)
ren numofdesnbyyear mednumofdesn
tempfile mednumofdesn
save `mednumofdesn', replace
restore

merge m:1 party_id using `mednumofdesn'  //merge in medium number of destinations

***Collapse data at firm-product level
collapse (mean) dairy affected_firms mednumofdesn, by(party_id origin_id)
bys party_id: gen totalnumofdesn=_N   //calculate number of total destinations

***Collapse data at firm level
collapse (mean) dairy affected_firms mednumofdesn totalnumofdesn, by(party_id)
gen partynum=1

***Median for number of destinations
preserve
keep if affected_firms==1
collapse (sum) partynum, by(mednumofdesn dairy)
replace mednumofdesn=mednumofdesn-0.1 if dairy==1
replace mednumofdesn=mednumofdesn+0.1 if dairy==0
graph twoway (bar partynum mednumofdesn if dairy==1, xlabel(0(2)24) yaxis(1) ylabel(0(1)5, axis(1)) ytitle("Number of Contaminated Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofdesn if dairy==0, xlabel(0(2)24) yaxis(2) ylabel(0(1)5, axis(2)) ytitle("Number of Contaminated Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Destinations") title("Number of Destinations (Median) for Contaminated Firms", size(medlarge)) subtitle("2009 - 2013")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_7.pdf",replace
restore


preserve
keep if affected_firms==0
egen p90=pctile(mednumofdesn), p(90)
drop if mednumofdesn>p90
collapse (sum) partynum, by(mednumofdesn dairy)
replace mednumofdesn=mednumofdesn-0.1 if dairy==1
replace mednumofdesn=mednumofdesn+0.1 if dairy==0
graph twoway (bar partynum mednumofdesn if dairy==1, xlabel(0(1)8) yaxis(1) ytitle("Number of Innocent+Non-Inspected Dairy Firms", axis(1)) barw(0.2) color(black*0.4))  ///
             (bar partynum mednumofdesn if dairy==0, xlabel(0(1)8) yaxis(2) ytitle("Number of Innocent+Non-Inspected Food Firms", axis(2)) barw(0.2) color(black)),  ///
			 legend(order(1 2) label(1 "Dairy Firms") label(2 "Non-dairy Food Firms")) xtitle("Median for Number of Destinations") title("Number of Destinations (Median) for Innocent+Non-Inspected Firms", size(medlarge)) subtitle("2009 - 2013")  ///
			 note("Notes: Dairy firms are not included in food firms", size(vsmall))
graph export "Results/figures/figure_a4_8.pdf",replace
restore


*******************
*    Figure A7    *
*******************


use "Data/Manufacturing survey/manufacturing_survey_cleaned_1998_2013",clear

ren hylb industry4digit
ren djzclx registration_type
ren gykgqk ownership_type
ren kysjn establishment_year
ren cyrs employment
ren cpxslr salesprofits
ren zjtrhj intermediates
ren zyywsr salesrevenue
ren ckjhz exportsvalue
g totalcosts=zyywcb+zycb
drop zycb zyywcb
	
g industry2digit=substr(industry4digit,1,2)
g dairy=inlist(industry4digit,"1493","1420") if year<2003
replace dairy=inlist(industry4digit,"1534","1440") if year>2002

replace salesrevenue=salesprofits+totalcosts if salesrevenue==.

keep if dairy==1

merge m:1 panelid using "Data/firm_codebooks/firmID_census2customs.dta"
keep if _m==3
drop _m

merge 1:1 party_id year using "Data/firm_codebooks/customs_firm_year_collapsed.dta"
keep if _m==3
drop _m

merge m:1 year using "Data/other_codebooks/usdtocnyexchangerate_2000_2013.dta"
drop if _m==2
drop _m
replace value=value*exchrate/10^3
keep panelid year value

tempfile fillinexp
save `fillinexp', replace
restore

merge 1:1 panelid year using `fillinexp'
drop _m

gen exportsvalue1=exportsvalue
replace exportsvalue1=value if exportsvalue1==.
replace exportsvalue1=0 if exportsvalue1==.

gen dsalesrevenue1=salesrevenue-exportsvalue1
replace dsalesrevenue1=. if dsalesrevenue1<0

replace dsalesrevenue1=dsalesrevenue1/10^3
replace exportsvalue1=exportsvalue1/10^3
g lndsalesrevenue1=log(dsalesrevenue1)

keep panelid year dsalesrevenue1 lndsalesrevenue1 exportsvalue1 salesrevenue exportsvalue

tempfile manu_dsales
save `manu_dsales', replace


use "Data/Coded data/manucensus_coded",clear
g ProvinceXPost=prov*post
keep if dairy==1


merge 1:1 panelid year using `manu_dsales'
drop _m

keep if inrange(year,2005,2013)
drop if year==2010


replace lndsalesrevenue1=. if lndsalesrevenue==.

keep if year==2007
merge m:1 panelid using "Data/firm_codebooks/firmID_census2customs.dta"
drop if _m==2
drop _m


gen exp_rev_value_share1=exportsvalue1/salesrevenue

replace exp_rev_value_share1=. if exp_rev_value_share1>1

label var exp_rev_value_share1 "Exporting Revenue Share"

histogram exp_rev_value_share1 if dairy==1 & exp_rev_value_share1!=0, frac
graph export "Results/appendix_figures/figure_a7.png", as(png) replace

